An enhanced WAASB: relationship information can improve multi-trait multi-environment selection of untested maize (Zea mays L.) lines
1 Overview
This document describes the analysis of a maize dataset with 160 three-way hybrids evaluated in eight trials (environments). The trials’ design was an alpha-lattice with two replications, 11 blocks and 15 genotypes per block. Here are the packages used:
Codes
library(tidyverse)
library(asreml)
library(AGHmatrix)
library(metan)
library(ggrepel)
library(ggpubr)
library(ComplexHeatmap)Here is the dataset, its structure, and the connectivity between trials (Figure 1):
Codes
pheno = read.csv2(file = "../data/pheno.csv")
pheno = transform(pheno,
AP = as.numeric(AP),
AE = as.numeric(AE),
PG = as.numeric(PG))
str(pheno)'data.frame': 2630 obs. of 11 variables:
$ env : chr "COAN21" "COAN21" "COAN21" "COAN21" ...
$ block : int 3 5 6 2 14 1 11 2 15 5 ...
$ repl : int 1 1 1 1 1 1 1 1 1 1 ...
$ check : int 0 0 0 0 0 0 0 0 0 0 ...
$ hybrid : chr "(VML003/VML022)//VML090" "(VML003/VML022)//VML115" "(VML003/VML090)//VML022" "(VML003/VML165)//VML016" ...
$ hyb.cod: chr "94V3001" "94V3002" "94V3003" "94V3004" ...
$ FM : int 63 67 63 64 62 63 59 63 63 62 ...
$ FF : int 60 64 67 63 62 64 59 60 66 62 ...
$ AP : num 222 206 210 230 220 ...
$ AE : num 110.8 99.5 109.5 110.8 97.8 ...
$ PG : num 10051 8170 8795 12435 10653 ...
Codes
pheno$PG = pheno$PG * 0.001
conec = as.data.frame(table(unique(pheno[,c('env', 'hyb.cod')])$hyb.cod,
unique(pheno[,c('env', 'hyb.cod')])$env))
conec |> ggplot(aes(y = Var1, x = Var2, fill = as.factor(Freq))) +
geom_tile() +
scale_fill_manual(values = c("#ffff99", "#386cb0"),
labels = c("Abscent", "Present"),na.value = 'white') +
theme_minimal() + theme(axis.text.y = element_blank(),
legend.position = 'top') +
labs(x = "Enviroment", y = "Hybrids", fill = "",
caption = paste0("Connectivity: ", round(sum(conec$Freq)/nrow(conec) *
100,2),"%"))1.1 Pedigree
There are 19 lines involved in the three-way crossings. These lines were first inter-crossed to originate 61 simple hybrids. Then, these hybrid were crossed with lines different from their parents to generate 160 three-way hybrids. Here is the pedigree and a heatmap representation of the additive relationship matrix (Figure 2).
Codes
ped = read.csv(file = "../data/ped.csv")
Amat = Amatrix(data = ped, ploidy = 2)Verifying conflicting data...
Organizing data...
Your data was chronologically organized with success.
Constructing matrix A using ploidy = 2
Completed! Time = 0 minutes
Codes
Heatmap(
Amat,
show_row_names = FALSE,
show_column_names = FALSE,
show_row_dend = FALSE,
column_dend_side = 'bottom',
show_parent_dend_line = FALSE,
show_column_dend = TRUE,
# row_order = rownames(Amat),
# column_order = rownames(Amat),
row_split = case_when(
grepl("94V", rownames(Amat)) ~ "Gen. 2: 3WC",
grepl("/", rownames(Amat)) ~ "Gen. 1: SCH",
.default = 'Gen. 0: Lines'
),
column_split = case_when(
grepl("94V", colnames(Amat)) ~ "Gen. 2: 3WC",
grepl("/", colnames(Amat)) ~ "Gen. 1: SCH",
.default = 'Gen. 0: Lines'
),
col = viridis::mako(n = 10),
heatmap_legend_param = list(title = "Relationship"),
row_title_gp = gpar(fontsize = 10),
column_title_gp = gpar(fontsize = 10)
)2 Single-environment models
The following model was used to fit single-environment models for each trait:
\[ \mathbf{y} = \mathbf{1}\mu + \mathbf{X}\mathbf{r} + \mathbf{Z}_1\mathbf{g} + \mathbf{Z}_2\mathbf{b} + \boldsymbol{\varepsilon} \tag{1}\]
where \(\mathbf{y}\) is the \(N_m \times 1\) vector of phenotypic records, \(\mu\) is the intercept, \(\mathbf{r}\) is the \(2\times 1\) vector of fixed repetition effects, \(\mathbf{g}\) is the \(V \times 1\) vector of random additive-genetic effects, distributed as \(\mathbf{g} \sim \mathcal{N} \left( \mathbf{0}, \sigma^2_g \mathbf{A} \right)\), where \(\mathbf{0}\) is a vector of zeros, \(\sigma^2_g\) is the additive-genetic variance and \(\mathbf{A}\) is the \(V \times V\) numerator relationship matrix; \(\mathbf{b}\) is the \(30 \times 1\) vector of random block effects, distributed as \(\mathbf{b} \sim \mathcal{N} \left( \mathbf{0}, \sigma^2_b \mathbf{I}_{30} \right)\), with \(\sigma^2_b\) representing the variance of blocks, and \(\mathbf{I}_{30}\), an identity matrix whose order is given by its subscript; and \(\boldsymbol{\varepsilon}\) is the \(N_m \times 1\) vector of residual effects \(\left[\boldsymbol{\varepsilon} \sim \mathcal{N} \left( \mathbf{0}, \sigma^2_\varepsilon \mathbf{I}_{N_m} \right) \text{, with } \sigma^2_\varepsilon \text{ being the residual variance}\right]\). The upper case letters \(\mathbf{X}\), \(\mathbf{Z}_1\) and \(\mathbf{Z}_2\) are the incidence matrix, whose number of columns is the same as the size of its respective vector, and number of rows is \(N_m\).
The process was automatized using a loop:
Codes
BLUPs = list()
mod.param = list()
for (i in unique(pheno$env)) {
bluptemp = list()
vc = list()
param = list()
x = transform(
droplevels(pheno[pheno$env == i, ]),
repl = as.factor(repl),
block = as.factor(block),
geno = as.factor(ifelse(check == 0, hyb.cod, 0)),
check.cod = as.factor(ifelse(check == 1, hyb.cod, 0)),
check = as.factor(check),
hyb.cod = as.factor(hyb.cod)
)
pedaux = ped[which(ped$geno %in% x$hyb.cod), ]
ainv = ainverse(ped = rbind(pedaux, data.frame(
geno = unique(c(pedaux$mum, pedaux$dad))[-1],
mum = 0,
dad = 0
)))
temp = unique(c(pedaux$mum, pedaux$dad))[-1]
aux = cbind(temp[which(grepl("/", temp))],
do.call(rbind, strsplit(temp[grep("/", temp)], split = '/')))
colnames(aux) = c('geno', "mum", "dad")
pedaux = rbind(data.frame(geno = temp[which(!grepl("/", temp))],
mum = 0, dad = 0), aux, pedaux)
rm(temp, aux)
ainv = ainverse(pedaux)
for(j in c("AP", "PG", "AE")){
message("\n \n ===> Environment ", i, "; Trait ", j, '\n \n')
form = as.formula(paste(j,'~repl'))
mod = asreml(fixed = form, random = ~ block + vm(hyb.cod, ainv), data = x)
if(!mod$converge) next
if(any(na.exclude(mod$vparameters.pc > 1))) mod = up.mod(mod)
varcomp = summary(mod)$varcomp
rownames(varcomp)[grep('hyb.cod', rownames(varcomp))] = 'geno'
rownames(varcomp)[grep('block', rownames(varcomp))] = 'block'
rownames(varcomp)[which(grepl('units', rownames(varcomp)))] = 'error'
pred = predict(mod, classify = 'hyb.cod', sed = TRUE)
bluptemp[[j]] = pred$pvals[,-4]
h2 = varcomp['geno',1]/(varcomp['geno',1] +
# (varcomp['block',1]/nlevels(x$block)) +
(varcomp['error', 1]/(nlevels(x$repl))))
# MVdelta = mean((pred$sed^2)[upper.tri(pred$sed^2,diag = F)])
# h2 = 1-(MVdelta/(2*varcomp['geno', 1]))
lrt.geno = lrt(mod, asreml(fixed = form, random = ~ block, data = x))
lrt.block = lrt(mod, asreml(fixed = form,
random = ~ vm(hyb.cod, ainv), data = x))
varcomp = left_join(varcomp |> rownames_to_column("effect"),
data.frame(
effect = c("geno", 'block'),
LRT = c(lrt.geno$`Pr(Chisq)`, lrt.block$`Pr(Chisq)`)
),
by = 'effect') |> mutate(env = i)
CV = sqrt(varcomp[grep("error", varcomp$effect), 2])/
mean(x[,j], na.rm = TRUE)
vc[[j]] = varcomp
param[[j]] = data.frame(param = c('h2', "CV"),
value = c(h2, CV),
env = i)
rm(mod, CV, varcomp, lrt.geno, lrt.block, h2)
}
BLUPs[[i]] = do.call(rbind, bluptemp) |>
rownames_to_column('trait') |>
mutate_at('trait', str_replace, '\\..*', '')
mod.param[[i]] = list(
varcomp = do.call(rbind, vc) |> rownames_to_column('trait') |>
mutate_at('trait', str_replace, '\\..*', ''),
param = do.call(rbind, param) |> rownames_to_column('trait') |>
mutate_at('trait', str_replace, '\\..*', '')
)
rm(x, ainv, pedaux)
}
varcomp = do.call(rbind, lapply(mod.param, function(x) x$varcomp))
param = do.call(rbind, lapply(mod.param, function(x) x$param))
rm(i,j)2.1 Results
Codes
ggarrange(
varcomp |> ggplot(aes(
x = factor(effect, levels = c("geno", 'block', 'error')), y = component
)) +
facet_grid(trait ~ env, scales = 'free_y', labeller = labeller(
.rows = c("AE" = "EH (cm)", "AP" = "PH (cm)", "PG" = "GY (ton/ha)")
)) +
geom_col(
data = subset(varcomp, LRT<0.05 &
!grepl('error', effect)),
fill = "skyblue",
color = 'black'
) +
geom_col(
data = subset(varcomp, LRT>0.05 &
!grepl('error', effect)),
color = 'black',
fill = "red4"
) +
geom_col(
data = subset(varcomp, grepl('error', effect)),
fill = 'black',
color = 'black'
) +
theme_bw() + theme(
legend.position = 'top',
axis.text.x = element_text(
angle = 90,
vjust = .5,
hjust = 1
)
) +
labs(x = "Effect", y = "Variance component estimate") +
geom_text(aes(
label = ifelse(LRT <= 0.05, "*", NA),
), size = 5) +
scale_x_discrete(
labels = c(
'block' = "Block",
'geno' = "Genetic",
'error' = "Residual"
)
),
param |> filter(param != "H2") |>
ggplot(aes(x = param, y = value)) +
facet_grid(trait ~ env, labeller = labeller(
.rows = c("AE" = "EH (cm)", "AP" = "PH (cm)", "PG" = "GY (kg/ha)")
)) +
geom_col(aes(fill = param), color = 'black') +
geom_text(aes(label = round(value, 3)), nudge_y = .08) +
scale_fill_viridis_d() +
labs(x = "Parameter", y = "") +
theme_bw() + theme(legend.position = 'none') +
scale_x_discrete(labels = c('CV' = "CV", 'h2' = expression(h^2))),
nrow = 2,
labels = "AUTO",
common.legend = TRUE
)Codes
BLUPs = lapply(BLUPs, function(x) {
x$status = case_when(
grepl("\\/V", x$hyb.cod) ~ "Shybrid",
grepl("94V", x$hyb.cod) ~ "Thybrid",
grepl("VML", x$hyb.cod) &
!grepl("\\/", x$hyb.cod) ~ 'line',
.default = "check"
)
x
})
do.call(rbind, BLUPs) |> rownames_to_column("env") |>
mutate_at("env", str_replace, "\\..*", "") |>
ggplot(aes(y = predicted.value, x = env)) +
geom_point(
aes(shape = status, colour = status, fill = status, size = status),
position = position_jitter(),
alpha = .6
) +
geom_violin(alpha = .5) +
facet_wrap(
. ~ trait,
scales = 'free_y',
nrow = 3,
labeller = labeller(.rows = c(
"AE" = "EH (cm)", "AP" = "PH (cm)", "PG" = "GY (ton/ha)"
))
) +
theme_bw() + theme(legend.position = 'top', legend.title = element_blank()) +
scale_colour_manual(values = c('env' = '#e41a1c', 'check' = '#377eb8',
'line' = '#4daf4a','Shybrid' = '#984ea3',
'Thybrid' = '#ff7f00'),
labels = c("env"="Environment", "check"="Check",
"line"="Line", "Shybrid"="Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid")) +
scale_shape_manual(values = c("env" = 17, "check" = 18,
"line" = 19, "Shybrid" = 25, "Thybrid" = 15),
labels = c("env"="Environment", "check"="Check",
"line"="Line", "Shybrid"="Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid")) +
scale_fill_manual(values = c('env' = '#e41a1c', 'check' = '#377eb8',
'line' = '#4daf4a','Shybrid' = '#984ea3',
'Thybrid' = '#ff7f00'),
labels = c("env"="Environment", "check"="Check",
"line"="Line", "Shybrid"="Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid"))+
scale_size_manual(values = c("env" = 2, "check" = 2,
"line" = 2, "Shybrid" = 1.3, "Thybrid" = 1),
labels = c("env"="Environment", "check"="Check",
"line"="Line", "Shybrid"="Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid"))+
labs(x = "Environment", y = "BLUP")Codes
ggarrange(
ggarrange(
varcomp |> ggplot(aes(
x = factor(effect, levels = c("geno", 'block', 'error')), y = component
)) +
facet_grid(trait ~ env, scales = 'free_y', labeller = labeller(
.rows = c("AE" = "EH (cm)", "AP" = "PH (cm)", "PG" = "GY (ton/ha)")
)) +
geom_col(
data = subset(varcomp, LRT<0.05 &
!grepl('error', effect)),
# aes(fill = "*"),
fill = "skyblue",
color = 'black'
) +
geom_col(
data = subset(varcomp, LRT>0.05 &
!grepl('error', effect)),
# aes(fill = "ns"),
color = 'black',
fill = "red4"
) +
geom_col(
data = subset(varcomp, grepl('error', effect)),
fill = 'black',
color = 'black'
) +
# scale_fill_manual("LRT", values = c("steelblue", "red4")) +
theme_bw() + theme(
legend.position = 'top',
axis.text.x = element_text(
angle = 90,
vjust = .5,
hjust = 1
)
) +
labs(x = "Effect", y = "Variance component estimate") +
geom_text(aes(
label = ifelse(LRT <= 0.05, "*", NA),
# y = component
), size = 5) +
scale_x_discrete(
labels = c(
'block' = "Block",
'geno' = "Genetic",
'error' = "Residual"
)
),
param |> filter(param != "H2") |>
ggplot(aes(x = param, y = value)) +
facet_grid(trait ~ env, labeller = labeller(
.rows = c("AE" = "EH (cm)", "AP" = "PH (cm)", "PG" = "GY (kg/ha)")
)) +
geom_col(aes(fill = param), color = 'black') +
geom_text(aes(label = round(value, 2)), nudge_y = .07, size = 3) +
scale_fill_viridis_d() +
labs(x = "Parameter", y = "") +
theme_bw() + theme(legend.position = 'none') +
scale_x_discrete(labels = c('CV' = "CV", 'h2' = expression(h^2))),
nrow = 2,
labels = "AUTO",
common.legend = FALSE
),
do.call(rbind, BLUPs) |> rownames_to_column("env") |>
mutate_at("env", str_replace, "\\..*", "") |>
ggplot(aes(y = predicted.value, x = env)) +
geom_point(
aes(shape = status, colour = status, fill = status, size = status),
position = position_jitter(),
alpha = .6
) +
geom_violin(alpha = .5) +
facet_wrap(
. ~ trait,
scales = 'free_y',
nrow = 3,
labeller = labeller(.rows = c(
"AE" = "EH (cm)", "AP" = "PH (cm)", "PG" = "GY (ton/ha)"
))
) +
theme_bw() + theme(legend.position = 'bottom', legend.title = element_blank(),plot.tag = element_text(face = "bold")) +
scale_colour_manual(values = c('env' = '#e41a1c', 'check' = '#377eb8',
'line' = '#4daf4a','Shybrid' = '#984ea3',
'Thybrid' = '#ff7f00'),
labels = c("env"="Environment", "check"="Check",
"line"="Line", "Shybrid"="Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid")) +
scale_shape_manual(values = c("env" = 17, "check" = 18,
"line" = 19, "Shybrid" = 25, "Thybrid" = 15),
labels = c("env"="Environment", "check"="Check",
"line"="Line", "Shybrid"="Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid")) +
scale_fill_manual(values = c('env' = '#e41a1c', 'check' = '#377eb8',
'line' = '#4daf4a','Shybrid' = '#984ea3',
'Thybrid' = '#ff7f00'),
labels = c("env"="Environment", "check"="Check",
"line"="Line", "Shybrid"="Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid"))+
scale_size_manual(values = c("env" = 2, "check" = 2,
"line" = 2, "Shybrid" = 1.3, "Thybrid" = 1),
labels = c("env"="Environment", "check"="Check",
"line"="Line", "Shybrid"="Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid"))+
labs(x = "Environment", y = "BLUP", tag = "C")
)3 Multi-environment models
\[ \mathbf{y} = \mathbf{1}\mu + \mathbf{X}_1\mathbf{e} + \mathbf{X}_2\mathbf{r} + \mathbf{Z}_1\mathbf{g} + \mathbf{Z}_2\mathbf{ge} + \mathbf{Z}_3\mathbf{b} + \boldsymbol{\varepsilon} \tag{2}\]
where \(\mathbf{e}\) is the \(M \times 1\) vector of fixed environment effects, and \(\mathbf{ge}\) is the \(VM \times 1\) vector of genotype-by-environment interaction (GEI) effects, distributed as \(\mathbf{ge} \sim \mathcal{N}\left(\mathbf{0}, \sigma^2_{ge} \mathbf{I}_{M} \otimes \mathbf{A}\right)\), where \(\sigma^2_{ge}\) is the GEI variance. All the other terms were previously described in Equation 1, and have new dimensions in Equation 2. \(\mathbf{y}\) is a \(N \times 1\) vector, so all incidence matrices have \(N\) rows. \(\mathbf{r}\) and \(\mathbf{b}\) have nested effects, so their original dimension is multiplied by the number of environments. Finally, we assumed that \(\boldsymbol{\varepsilon}\) followed a block-diagonal structure, i.e. \(\boldsymbol{\varepsilon} \sim \mathcal{N} \left(\mathbf{0}, \oplus_{m=1}^M \sigma^2_{\varepsilon_m} \otimes \mathbf{I}_{N_m} \right)\), where \(\oplus\) is the direct sum symbol.
A model was fitted for each trait, using a loop:
Codes
pheno = pheno[order(pheno$env),]
pheno = transform(pheno, repl = as.factor(repl),
block = as.factor(block),
geno = as.factor(ifelse(check == 0, hyb.cod, 0)),
check.cod = as.factor(ifelse(check == 1, hyb.cod, 0)),
check = as.factor(check),
env = as.factor(env),
hyb.cod = as.factor(hyb.cod))
ainv = ainverse(pedigree = ped)
vc = list()
LR = list()
main = list()
intmat = list()
for(i in c("PG", "AE", "AP")){
form = as.formula(paste(i, '~env + repl:env'))
mod = asreml(
fixed = form,
random = ~ vm(hyb.cod, ainv) + vm(hyb.cod, ainv):idv(env) +
block:env,
residual = ~ dsum( ~ units | env),
data = pheno
)
if (!mod$converge)
next
if (any(na.exclude(mod$vparameters.pc > 1)))
mod = up.mod(mod)
LR = list(G = lrt(
mod,
asreml(
fixed = form,
random = ~ block:env,
residual = ~ dsum( ~ units | env),
data = pheno
)
), GE = lrt(
mod,
asreml(
fixed = form,
random = ~ vm(hyb.cod, ainv) + block:env,
residual = ~ dsum( ~ units | env),
data = pheno
)
))
LR[[i]] = do.call(rbind, LR)
vc[[i]] = summary(mod)$varcomp |> mutate(trait = i)
blu = coef(mod)$random
intercept = coef(mod)$fixed[1]
mainbl = blu[which(grepl("hyb.cod", rownames(blu)) &
!grepl("env", rownames(blu))), ]
names(mainbl) = gsub(".*\\)_", "", names(mainbl))
main[[i]] = data.frame(
geno = names(mainbl),
g = mainbl,
mug = mainbl + intercept,
row.names = NULL
)
intbl = blu[which(grepl("hyb.cod", rownames(blu)) &
grepl("env", rownames(blu))), ]
names(intbl) = gsub(".*\\)_", "", names(intbl))
intbl = data.frame(do.call(rbind,strsplit(names(intbl), split = ":env_")),
intbl, row.names = NULL)
intbl = reshape(data = intbl, idvar = "X1", timevar = "X2", direction = "wide")
rownames(intbl) = intbl[,1]; intbl = intbl[,-1]
colnames(intbl) = gsub("intbl.", paste0(i,"@"), colnames(intbl))
intmat[[i]] = intbl
rm(mod, intbl, mainbl, form, i)
}3.1 Decomposition of the genotype-by-environment interaction
We computed the part of GEI due to scale-type interaction as follows:
\[ \sigma^2_{ge_{scale}} = 1 - \frac{Var\left( \sqrt{\sigma^2_{g_m}} \right)}{\sigma^2_{ge}} \tag{3}\]
Codes
indvargen = varcomp |> filter(effect == 'geno')
gedec = lapply(split(indvargen, indvargen$trait), function(x){
1 - var(sqrt(x$component))/vc[[which(names(vc) == unique(x$trait))]][3,1]
})
vc.perc = lapply(vc, function(x){
aux = x |> rownames_to_column('effect') |>
mutate(effect = case_when(
grepl('hyb.cod', effect) & !grepl("env", effect) ~ "gen",
grepl('hyb.cod', effect) & grepl("env", effect) ~ "gen:env",
.default = effect
)) |>
filter(grepl('gen', effect))
scale = aux[2, 2] * gedec[[which(names(gedec) == unique(aux$trait))]]
rank = aux[2, 2] * (1 - gedec[[which(names(gedec) == unique(aux$trait))]])
data.frame(
effect = c("main", 'rank-type', 'scale-type'),
perc = c(aux[1,2]/sum(aux[,2]), rank/sum(aux[,2]), scale/sum(aux[,2]))
)
})
vc.perc = do.call(rbind, vc.perc) |> rownames_to_column('trait') |>
mutate_at('trait', str_replace, '\\..*', '')
ggplot(data = vc.perc, aes(x = trait, y = perc, fill = effect)) +
geom_col(colour = "black") +
scale_y_continuous(labels = scales::percent) +
theme_bw() + theme(legend.position = 'top', legend.title = element_blank()) +
labs(x = "Trait", y = "Total genetic effects") +
scale_fill_viridis_d(
option = 'mako',
labels = c(
"Main effect",
"Crossover interaction",
"Non-crossover interaction"
),
direction = -1
) +
scale_x_discrete(labels = c("AE" = "EH", "AP" = "PH", "PG" = "GY"))3.2 AMMI
We decomposed the GEI matrix using singular value decomposition:
\[ \mathbf{\Phi} = \mathbf{U} \mathbf{\Lambda} \mathbf{V}^\prime \tag{4}\]
where \(\mathbf{\Lambda}\) is a \(P \times P\) diagonal matrix of singular values, with \(P \leq \min\left(V -1, M-1\right)\) and \(\mathbf{U}\) and \(\mathbf{V}\) are orthonormal matrices with the singular vectors of \(\mathbf{\Phi}\mathbf{\Phi}^\prime\) and \(\mathbf{\Phi}^\prime\mathbf{\Phi}\), respectively. The matrix of principal component scores for genotypes, and loadings for environments, were computed as \(\mathbf{W}_g = \mathbf{U} \mathbf{\Lambda}\) (dimension \(V \times P\)) and \(\mathbf{W}_e = \mathbf{V} \frac{\mathbf{\Lambda}}{\sqrt{M-1}}\) (dimension \(M \times P\)), respectively. We used the first two rows of \(\mathbf{W}_g\) and \(\mathbf{W}_e\) to perform AMMI-like analyses, i.e., AMMI1 (first principal component vs trait, Figure 6) and AMMI2 (first vs second principal components, Figure 7).
Codes
minimum <- min(nrow(Amat), nlevels(pheno$env)) - 1
Eigenvalue = data.frame()
pc.df = lapply(intmat, function(x) {
trait = do.call(rbind, str_split(colnames(x), pattern = "@"))
colnames(x) = trait[, 2]
trait = unique(trait[, 1])
X_centered <- scale(x, center = TRUE, scale = FALSE)
svd_res <- svd(X_centered)
svd_res$d = svd_res$d[1:minimum]
svd_res$u = svd_res$u[, 1:minimum]
svd_res$v = svd_res$v[, 1:minimum]
Eigenvalue <<- rbind(
Eigenvalue,
data.frame(Eigenvalue = svd_res$d^2) %>%
add_cols(
Proportion = svd_res$d^2 / sum(svd_res$d^2) *
100,
Accumulated = cumsum(Proportion),
PC = paste("PC", 1:minimum, sep = "")
) %>%
column_to_first(PC) |> mutate(trait = trait)
)
SCOREG <- svd_res$u %*% diag(svd_res$d) %>% as.data.frame() %>% add_cols(GEN = rownames(x), .before = 1)
LOADE <- svd_res$v %*% (diag(svd_res$d) / sqrt(length(svd_res$d) - 1)) %>% as.data.frame() %>% add_cols(ENV = colnames(x), .before = 1)
pc.df = rbind(
data.frame(LOADE, mug = tapply(pheno[, trait], pheno$env, function(x)
mean(x, na.rm = TRUE))) |>
rename(level = ENV) |> mutate(type = "env"),
as.data.frame(
SCOREG |> left_join(main[[trait]][, c("geno", 'mug')], by = c("GEN" = 'geno')) |>
rename(level = GEN) |>
mutate(
type = case_when(
grepl("\\/V", level) ~ "Shybrid",
grepl("94V", level) ~ "Thybrid",
grepl("VML", level) &
!grepl("\\/", level) ~ 'line',
.default = "check"
)
)
)
) |> mutate(trait = trait)
colnames(pc.df)[which(grepl("V", colnames(pc.df)))] = paste0("PC", 1:(nlevels(pheno$env) -
1))
pc.df
})
pc.df = do.call(rbind, pc.df)Codes
pc.df |> ggplot(aes(x = mug, y = PC1)) +
facet_wrap(.~trait, scales = 'free',
labeller = labeller(.rows = c("AE" = "EH (cm)", "AP" = "PH (cm)",
"PG" = "GY (kg/ha)"))) +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_vline(data = pc.df |> reframe(med = mean(mug), .by = trait),
aes(xintercept = med), linetype = "dashed") +
geom_point(aes(shape = type, colour = type, size = type, alpha = type, fill = type)) +
theme_bw() +
theme(legend.position = 'top', axis.title.x = element_blank(), legend.title = element_blank()) +
scale_size_manual(values = c("env" = 2, "check" = 2,
"line" = 2, "Shybrid" = 1.3, "Thybrid" = 1),
labels = c("env"="Environment", "check"="Check",
"line"="Line", "Shybrid"="Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid")) +
scale_colour_manual(values = c('env' = '#e41a1c', 'check' = '#377eb8',
'line' = '#4daf4a','Shybrid' = '#984ea3',
'Thybrid' = '#ff7f00'),
labels = c("env"="Environment", "check"="Check",
"line"="Line", "Shybrid"="Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid")) +
scale_fill_manual(values = c('env' = '#e41a1c', 'check' = '#377eb8',
'line' = '#4daf4a','Shybrid' = '#984ea3',
'Thybrid' = '#ff7f00'),
labels = c("env"="Environment", "check"="Check",
"line"="Line", "Shybrid"="Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid"))+
scale_shape_manual(values = c("env" = 17, "check" = 18,
"line" = 19, "Shybrid" = 25, "Thybrid" = 15),
labels = c("env"="Environment", "check"="Check",
"line"="Line", "Shybrid"="Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid")) +
scale_alpha_manual(values = c("env" = 1, "check" = .8,
"line" = .8, "Shybrid" = .6, "Thybrid" = .6),
labels = c("env"="Environment", "check"="Check",
"line"="Line", "Shybrid"="Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid")) +
geom_segment(data = subset(pc.df, type == "env") |>
left_join(pc.df |> reframe(med = mean(mug), .by = trait)), show.legend = FALSE,
aes(x = med, y = 0, yend = PC1,
xend = mug, colour = type)) +
labs(y = paste0("PC1 (",round(Eigenvalue$Proportion[1], 2),"%)")) +
geom_label_repel(data = subset(pc.df, type == "env"), aes(label = level),
size = 2, alpha = .7) Codes
pc.df |> ggplot(aes(x = PC1, y = PC2)) +
facet_wrap(.~trait, scales = 'free',
labeller = labeller(.rows = c("AE" = "EH (cm)", "AP" = "PH (cm)",
"PG" = "GY (kg/ha)"))) +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_vline(xintercept = 0, linetype = "dashed") +
geom_point(aes(shape = type, colour = type, size = type, alpha = type, fill= type)) +
theme_bw() +
theme(legend.position = 'top', legend.title = element_blank()) +
scale_size_manual(values = c("env" = 2, "check" = 2,
"line" = 2, "Shybrid" = 1.3, "Thybrid" = 1),
labels = c("env"="Environment", "check"="Check",
"line"="Line", "Shybrid"="Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid")) +
scale_colour_manual(values = c('env' = '#e41a1c', 'check' = '#377eb8',
'line' = '#4daf4a','Shybrid' = '#984ea3',
'Thybrid' = '#ff7f00'),
labels = c("env"="Environment", "check"="Check",
"line"="Line", "Shybrid"="Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid")) +
scale_fill_manual(values = c('env' = '#e41a1c', 'check' = '#377eb8',
'line' = '#4daf4a','Shybrid' = '#984ea3',
'Thybrid' = '#ff7f00'),
labels = c("env"="Environment", "check"="Check",
"line"="Line", "Shybrid"="Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid"))+
scale_shape_manual(values = c("env" = 17, "check" = 18,
"line" = 19, "Shybrid" = 25, "Thybrid" = 15),
labels = c("env"="Environment", "check"="Check",
"line"="Line", "Shybrid"="Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid")) +
scale_alpha_manual(values = c("env" = 1, "check" = .8,
"line" = .8, "Shybrid" = .6, "Thybrid" = .6),
labels = c("env"="Environment", "check"="Check",
"line"="Line", "Shybrid"="Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid")) +
geom_segment(data = subset(pc.df, type == "env"), show.legend = FALSE,
aes(x = 0, y = 0, xend = PC1, yend = PC2, colour = type)) +
labs(x = paste0("PC1 (",round(Eigenvalue$Proportion[1], 2),"%)"),
y = paste0("PC2 (",round(Eigenvalue$Proportion[2], 2),"%)")) +
geom_label_repel(data = subset(pc.df, type == "env"), aes(label = level),
size = 2, alpha = .7) +
ggh4x::facetted_pos_scales(
x = list(
scale_x_continuous(limits = c(min(pc.df[which(pc.df$trait == "AE"),c("PC1", "PC2")]),
max(pc.df[which(pc.df$trait == "AE"),c("PC1", "PC2")]))),
scale_x_continuous(limits = c(min(pc.df[which(pc.df$trait == "AP"),c("PC1", "PC2")]),
max(pc.df[which(pc.df$trait == "AP"),c("PC1", "PC2")]))),
scale_x_continuous(limits = c(min(pc.df[which(pc.df$trait == "PG"),c("PC1", "PC2")]),
max(pc.df[which(pc.df$trait == "PG"),c("PC1", "PC2")])))
),
y = list(
scale_y_continuous(limits = c(min(pc.df[which(pc.df$trait == "AE"),c("PC1", "PC2")]),
max(pc.df[which(pc.df$trait == "AE"),c("PC1", "PC2")]))),
scale_y_continuous(limits = c(min(pc.df[which(pc.df$trait == "AP"),c("PC1", "PC2")]),
max(pc.df[which(pc.df$trait == "AP"),c("PC1", "PC2")]))),
scale_y_continuous(limits = c(min(pc.df[which(pc.df$trait == "PG"),c("PC1", "PC2")]),
max(pc.df[which(pc.df$trait == "PG"),c("PC1", "PC2")])))
)
)3.3 WAASB
We used the same matrices employed in the AMMI analyses to compute the weighted average of absolute scores of GEI’s BLUPs (WAASB), given by:
\[ WAASB_v = \frac{\sum_{p=1}^P \vert w_{pv} \times \tau_p \vert}{\sum_{p=1}^P \tau_p} \tag{5}\]
where \(w_{pv}\) is the score of candidate \(v\) in the \(p^{th}\) component, and \(\tau_p\) is the variance explained by the \(p^{th}\) component. The lower the WAASB, the more stable the genotype (Figure 8). Conclusions drawn from WAASB have one advantage over AMMI: they are based on all the principal components, rather than only the first two.
Codes
pc.df = do.call(rbind, lapply(split(pc.df, pc.df$trait), function(x) {
trait = unique(x$trait)
aux = as.matrix(x |> column_to_rownames('level') |> select(-type, -mug, -trait))
WAASB = apply(aux, 1, function(x)
abs(x) %*% as.matrix(Eigenvalue[which(Eigenvalue$trait == trait), 2]) /
sum(as.matrix(Eigenvalue[which(Eigenvalue$trait == trait), 2])))
left_join(x, as.data.frame(WAASB) |> rownames_to_column("level"), by = 'level')
}))
pc.df |> filter(type != "env") |>
ggplot(aes(x = mug, y = WAASB)) +
facet_wrap(. ~ trait, scales = 'free', labeller = labeller(.rows = c(
"AE" = "EH (cm)", "AP" = "PH (cm)", "PG" = "GY (kg/ha)"
))) +
geom_hline(
data = pc.df |> reframe(med = mean(WAASB), .by = trait),
aes(yintercept = med),
linetype = "dashed"
) +
geom_vline(
data = pc.df |> reframe(med = mean(mug), .by = trait),
aes(xintercept = med),
linetype = "dashed"
) +
geom_point(aes(
shape = type,
colour = type,
size = type,
alpha = type,
fill = type
)) +
theme_bw() +
theme(
legend.position = 'top',
axis.title.x = element_blank(),
legend.title = element_blank()
) +
scale_size_manual(
values = c(
"env" = 2,
"check" = 2,
"line" = 2,
"Shybrid" = 1.3,
"Thybrid" = 1
),
labels = c(
"env" = "Environment",
"check" = "Check",
"line" = "Line",
"Shybrid" = "Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid"
)
) +
scale_colour_manual(
values = c(
'env' = '#e41a1c',
'check' = '#377eb8',
'line' = '#4daf4a',
'Shybrid' = '#984ea3',
'Thybrid' = '#ff7f00'
),
labels = c(
"env" = "Environment",
"check" = "Check",
"line" = "Line",
"Shybrid" = "Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid"
)
) +
scale_fill_manual(
values = c(
'env' = '#e41a1c',
'check' = '#377eb8',
'line' = '#4daf4a',
'Shybrid' = '#984ea3',
'Thybrid' = '#ff7f00'
),
labels = c(
"env" = "Environment",
"check" = "Check",
"line" = "Line",
"Shybrid" = "Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid"
)
) +
scale_shape_manual(
values = c(
"env" = 17,
"check" = 18,
"line" = 19,
"Shybrid" = 25,
"Thybrid" = 15
),
labels = c(
"env" = "Environment",
"check" = "Check",
"line" = "Line",
"Shybrid" = "Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid"
)
) +
scale_alpha_manual(
values = c(
"env" = 1,
"check" = .8,
"line" = .8,
"Shybrid" = .6,
"Thybrid" = .6
),
labels = c(
"env" = "Environment",
"check" = "Check",
"line" = "Line",
"Shybrid" = "Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid"
)
) Codes
ggarrange(
pc.df |> ggplot(aes(x = mug, y = PC1)) +
facet_wrap(.~trait, scales = 'free',
labeller = labeller(.rows = c("AE" = "EH (cm)", "AP" = "PH (cm)",
"PG" = "GY (kg/ha)"))) +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_vline(data = pc.df |> reframe(med = mean(mug), .by = trait),
aes(xintercept = med), linetype = "dashed") +
geom_point(aes(shape = type, colour = type, size = type, alpha = type, fill = type)) +
theme_bw() +
theme(legend.position = 'top', axis.title.x = element_blank(), legend.title = element_blank()) +
scale_size_manual(values = c("env" = 2, "check" = 2,
"line" = 2, "Shybrid" = 1.3, "Thybrid" = 1),
labels = c("env"="Environment", "check"="Check",
"line"="Line", "Shybrid"="Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid")) +
scale_colour_manual(values = c('env' = '#e41a1c', 'check' = '#377eb8',
'line' = '#4daf4a','Shybrid' = '#984ea3',
'Thybrid' = '#ff7f00'),
labels = c("env"="Environment", "check"="Check",
"line"="Line", "Shybrid"="Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid")) +
scale_fill_manual(values = c('env' = '#e41a1c', 'check' = '#377eb8',
'line' = '#4daf4a','Shybrid' = '#984ea3',
'Thybrid' = '#ff7f00'),
labels = c("env"="Environment", "check"="Check",
"line"="Line", "Shybrid"="Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid"))+
scale_shape_manual(values = c("env" = 17, "check" = 18,
"line" = 19, "Shybrid" = 25, "Thybrid" = 15),
labels = c("env"="Environment", "check"="Check",
"line"="Line", "Shybrid"="Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid")) +
scale_alpha_manual(values = c("env" = 1, "check" = .8,
"line" = .8, "Shybrid" = .6, "Thybrid" = .6),
labels = c("env"="Environment", "check"="Check",
"line"="Line", "Shybrid"="Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid")) +
geom_segment(data = subset(pc.df, type == "env") |>
left_join(pc.df |> reframe(med = mean(mug), .by = trait)), show.legend = FALSE,
aes(x = med, y = 0, yend = PC1,
xend = mug, colour = type)) +
labs(y = paste0("PC1 (",round(Eigenvalue$Proportion[1], 2),"%)")) +
geom_label_repel(data = subset(pc.df, type == "env"), aes(label = level),
size = 2, alpha = .7),
pc.df |> ggplot(aes(x = PC1, y = PC2)) +
facet_wrap(.~trait, scales = 'free',
labeller = labeller(.rows = c("AE" = "EH (cm)", "AP" = "PH (cm)",
"PG" = "GY (kg/ha)"))) +
geom_hline(yintercept = 0, linetype = "dashed") +
geom_vline(xintercept = 0, linetype = "dashed") +
geom_point(aes(shape = type, colour = type, size = type, alpha = type, fill= type)) +
theme_bw() +
theme(legend.position = 'top', legend.title = element_blank()) +
scale_size_manual(values = c("env" = 2, "check" = 2,
"line" = 2, "Shybrid" = 1.3, "Thybrid" = 1),
labels = c("env"="Environment", "check"="Check",
"line"="Line", "Shybrid"="Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid")) +
scale_colour_manual(values = c('env' = '#e41a1c', 'check' = '#377eb8',
'line' = '#4daf4a','Shybrid' = '#984ea3',
'Thybrid' = '#ff7f00'),
labels = c("env"="Environment", "check"="Check",
"line"="Line", "Shybrid"="Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid")) +
scale_fill_manual(values = c('env' = '#e41a1c', 'check' = '#377eb8',
'line' = '#4daf4a','Shybrid' = '#984ea3',
'Thybrid' = '#ff7f00'),
labels = c("env"="Environment", "check"="Check",
"line"="Line", "Shybrid"="Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid"))+
scale_shape_manual(values = c("env" = 17, "check" = 18,
"line" = 19, "Shybrid" = 25, "Thybrid" = 15),
labels = c("env"="Environment", "check"="Check",
"line"="Line", "Shybrid"="Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid")) +
scale_alpha_manual(values = c("env" = 1, "check" = .8,
"line" = .8, "Shybrid" = .6, "Thybrid" = .6),
labels = c("env"="Environment", "check"="Check",
"line"="Line", "Shybrid"="Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid")) +
geom_segment(data = subset(pc.df, type == "env"), show.legend = FALSE,
aes(x = 0, y = 0, xend = PC1, yend = PC2, colour = type)) +
labs(x = paste0("PC1 (",round(Eigenvalue$Proportion[1], 2),"%)"),
y = paste0("PC2 (",round(Eigenvalue$Proportion[2], 2),"%)")) +
geom_label_repel(data = subset(pc.df, type == "env"), aes(label = level),
size = 2, alpha = .7) +
ggh4x::facetted_pos_scales(
x = list(
scale_x_continuous(limits = c(min(pc.df[which(pc.df$trait == "AE"),c("PC1", "PC2")]),
max(pc.df[which(pc.df$trait == "AE"),c("PC1", "PC2")]))),
scale_x_continuous(limits = c(min(pc.df[which(pc.df$trait == "AP"),c("PC1", "PC2")]),
max(pc.df[which(pc.df$trait == "AP"),c("PC1", "PC2")]))),
scale_x_continuous(limits = c(min(pc.df[which(pc.df$trait == "PG"),c("PC1", "PC2")]),
max(pc.df[which(pc.df$trait == "PG"),c("PC1", "PC2")])))
),
y = list(
scale_y_continuous(limits = c(min(pc.df[which(pc.df$trait == "AE"),c("PC1", "PC2")]),
max(pc.df[which(pc.df$trait == "AE"),c("PC1", "PC2")]))),
scale_y_continuous(limits = c(min(pc.df[which(pc.df$trait == "AP"),c("PC1", "PC2")]),
max(pc.df[which(pc.df$trait == "AP"),c("PC1", "PC2")]))),
scale_y_continuous(limits = c(min(pc.df[which(pc.df$trait == "PG"),c("PC1", "PC2")]),
max(pc.df[which(pc.df$trait == "PG"),c("PC1", "PC2")])))
)
),
pc.df |> filter(type != "env") |>
ggplot(aes(x = mug, y = WAASB)) +
facet_wrap(. ~ trait, scales = 'free', labeller = labeller(.rows = c(
"AE" = "EH (cm)", "AP" = "PH (cm)", "PG" = "GY (kg/ha)"
))) +
geom_hline(
data = pc.df |> reframe(med = mean(WAASB), .by = trait),
aes(yintercept = med),
linetype = "dashed"
) +
geom_vline(
data = pc.df |> reframe(med = mean(mug), .by = trait),
aes(xintercept = med),
linetype = "dashed"
) +
geom_point(aes(
shape = type,
colour = type,
size = type,
alpha = type,
fill = type
)) +
theme_bw() +
theme(
legend.position = 'top',
axis.title.x = element_blank(),
legend.title = element_blank()
) +
scale_size_manual(
values = c(
"env" = 2,
"check" = 2,
"line" = 2,
"Shybrid" = 1.3,
"Thybrid" = 1
),
labels = c(
"env" = "Environment",
"check" = "Check",
"line" = "Line",
"Shybrid" = "Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid"
)
) +
scale_colour_manual(
values = c(
'env' = '#e41a1c',
'check' = '#377eb8',
'line' = '#4daf4a',
'Shybrid' = '#984ea3',
'Thybrid' = '#ff7f00'
),
labels = c(
"env" = "Environment",
"check" = "Check",
"line" = "Line",
"Shybrid" = "Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid"
)
) +
scale_fill_manual(
values = c(
'env' = '#e41a1c',
'check' = '#377eb8',
'line' = '#4daf4a',
'Shybrid' = '#984ea3',
'Thybrid' = '#ff7f00'
),
labels = c(
"env" = "Environment",
"check" = "Check",
"line" = "Line",
"Shybrid" = "Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid"
)
) +
scale_shape_manual(
values = c(
"env" = 17,
"check" = 18,
"line" = 19,
"Shybrid" = 25,
"Thybrid" = 15
),
labels = c(
"env" = "Environment",
"check" = "Check",
"line" = "Line",
"Shybrid" = "Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid"
)
) +
scale_alpha_manual(
values = c(
"env" = 1,
"check" = .8,
"line" = .8,
"Shybrid" = .6,
"Thybrid" = .6
),
labels = c(
"env" = "Environment",
"check" = "Check",
"line" = "Line",
"Shybrid" = "Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid"
)
)+
geom_label_repel(data = subset(pc.df, type == "line"),
aes(label = level), fill = "#4daf4a",
show.legend = FALSE, face = 'bold',
size = 2, alpha = .7, max.overlaps = 100) ,
common.legend = TRUE, labels = "AUTO", nrow = 3
)
pc.df |> filter(type != "env") |>
ggplot(aes(x = mug, y = WAASB)) +
facet_wrap(. ~ trait, scales = 'free', labeller = labeller(.rows = c(
"AE" = "EH (cm)", "AP" = "PH (cm)", "PG" = "GY (kg/ha)"
))) +
geom_hline(
data = pc.df |> reframe(med = mean(WAASB), .by = trait),
aes(yintercept = med),
linetype = "dashed"
) +
geom_vline(
data = pc.df |> reframe(med = mean(mug), .by = trait),
aes(xintercept = med),
linetype = "dashed"
) +
geom_point(aes(
shape = type,
colour = type,
size = type,
alpha = type,
fill = type
)) +
theme_bw() +
theme(
legend.position = 'top',
axis.title.x = element_blank(),
legend.title = element_blank()
) +
scale_size_manual(
values = c(
"env" = 2,
"check" = 2,
"line" = 2,
"Shybrid" = 1.3,
"Thybrid" = 1
),
labels = c(
"env" = "Environment",
"check" = "Check",
"line" = "Line",
"Shybrid" = "Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid"
)
) +
scale_colour_manual(
values = c(
'env' = '#e41a1c',
'check' = '#377eb8',
'line' = '#4daf4a',
'Shybrid' = '#984ea3',
'Thybrid' = '#ff7f00'
),
labels = c(
"env" = "Environment",
"check" = "Check",
"line" = "Line",
"Shybrid" = "Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid"
)
) +
scale_fill_manual(
values = c(
'env' = '#e41a1c',
'check' = '#377eb8',
'line' = '#4daf4a',
'Shybrid' = '#984ea3',
'Thybrid' = '#ff7f00'
),
labels = c(
"env" = "Environment",
"check" = "Check",
"line" = "Line",
"Shybrid" = "Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid"
)
) +
scale_shape_manual(
values = c(
"env" = 17,
"check" = 18,
"line" = 19,
"Shybrid" = 25,
"Thybrid" = 15
),
labels = c(
"env" = "Environment",
"check" = "Check",
"line" = "Line",
"Shybrid" = "Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid"
)
) +
scale_alpha_manual(
values = c(
"env" = 1,
"check" = .8,
"line" = .8,
"Shybrid" = .6,
"Thybrid" = .6
),
labels = c(
"env" = "Environment",
"check" = "Check",
"line" = "Line",
"Shybrid" = "Single-cross hybrid",
"Thybrid" = "Three-way cross hybrid"
)
) +
geom_label_repel(data = subset(pc.df, type == "line"),
aes(label = level), fill = "#4daf4a",
show.legend = FALSE)We also computed the WAASBY, an index that weight the genotype’s performance and stability (WAASB), as follows:
\[ WAASBY_v = \frac{\breve{g}_v \times \theta + \breve{w}_v \times \left(1-\theta\right)}{\theta + \left(1-\theta\right)} \tag{6}\]
where \(\breve{g}\) and \(\breve{w}\) are performances and WAASBs rescaled to range from 0 to 100, and \(\theta\) is a weight that determines the importance of both performance (\(\theta\) per se) and stability (\(1 - \theta\)). Here, we assumed that \(\theta = 0.75\). The WAASBY is necessary to compute an index that will aid in a selection that considers per-trait performance and stability (next topic).
Codes
theta = .75
nma_up = 100
nmi_up = 0
nma_down = 0
nmi_down = 100
pcgen = pc.df |> filter(type != "env")
pcgen = pcgen |> mutate(scalmug = case_when(
trait == "PG" ~ ((nma_up-nmi_up)/
(max(pcgen[pcgen$trait == "PG", 'mug']) - min(pcgen[pcgen$trait == "PG", 'mug']))) *
(mug - max(pcgen[pcgen$trait == "PG", 'mug'])) + nma_up,
trait == "AE" ~ ((nma_down-nmi_down)/
(max(pcgen[pcgen$trait == "AE", 'mug']) - min(pcgen[pcgen$trait == "AE", 'mug']))) *
(mug - max(pcgen[pcgen$trait == "AE", 'mug'])) + nma_down,
trait == "AP" ~ ((nma_down-nmi_down)/
(max(pcgen[pcgen$trait == "AP", 'mug']) - min(pcgen[pcgen$trait == "AP", 'mug']))) *
(mug - max(pcgen[pcgen$trait == "AP", 'mug'])) + nma_down
),
scalwaasb = case_when(
trait == "PG" ~ ((nma_down-nmi_down)/
(max(pcgen[pcgen$trait == "PG", 'WAASB']) - min(pcgen[pcgen$trait == "PG", 'WAASB']))) *
(WAASB - max(pcgen[pcgen$trait == "PG", 'WAASB'])) + nma_down,
trait == "AE" ~ ((nma_down-nmi_down)/
(max(pcgen[pcgen$trait == "AE", 'WAASB']) - min(pcgen[pcgen$trait == "AE", 'WAASB']))) *
(WAASB - max(pcgen[pcgen$trait == "AE", 'WAASB'])) + nma_down,
trait == "AP" ~ ((nma_down-nmi_down)/
(max(pcgen[pcgen$trait == "AP", 'WAASB']) - min(pcgen[pcgen$trait == "AP", 'WAASB']))) *
(WAASB - max(pcgen[pcgen$trait == "AP", 'WAASB'])) + nma_down
),
WAASBY = scalmug*theta + scalwaasb * (1-theta))
waasbymat = pcgen |> select(level, trait, WAASBY) |>
pivot_wider(names_from = trait, values_from = WAASBY) |>
column_to_rownames('level')3.4 MTSI
Essentially, the multi-trait stability index is a genotype-ideotype Euclidean distance based on factor analysis. First, we computed the correlation between the WAASBY values of each trait. For this, let \(\mathbf{Q}\) be the \(V \times T\) WAASBY matrix. The correlation was computed by:
\[ \mathbf{R} = \mathbf{D}^{-1} \mathbf{C} \mathbf{D}^{-1} \tag{7}\]
where \(\mathbf{D}^{-1}\) is a diagonal matrix whose elements are the standard deviation of the columns of \(\mathbf{Q}\), and \(\mathbf{C}\) is the covariance matrix of \(\mathbf{Q}\). Next, we performed the eigendecomposition of \(\mathbf{R}\), given by:
\[ \mathbf{R} = \mathbf{U} \mathbf{\Lambda} \mathbf{U} \tag{8}\]
where \(\mathbf{U}\) is the matrix of eigenvectors, and \(\mathbf{\Lambda}\) is a diagonal matrix of eigenvalues. We retained the two first eigenvalues, as they explained more than 80% of the total variation; and their corresponding eigenvectors. The \(K\) remaining eigenvectors composed the initial loadings, as follows:
\[ \mathbf{L} = \mathbf{U}_K \mathbf{\Lambda}^{\frac{1}{2}} \tag{9}\] where \(\mathbf{U}_K\) contains the first \(K\) eigenvectors, and \(\mathbf{\Lambda}^{\frac{1}{2}}\) represent the square root of the \(K\) retained eigenvalues. To ensure interpretability, \(\mathbf{L}\) was rotated using the varimax method. After rotation, the canonical loadings (say, \(\ddot{\mathbf{L}}\)) were obtained, from which the matrix of scores (\(\mathbf{F}\)) was computed, as follows:
\[ \mathbf{F} = \breve{\mathbf{Q}} \left(\ddot{\mathbf{L}}^\prime \mathbf{R}\right)^\prime \tag{10}\] where \(\breve{\mathbf{Q}}\) is a matrix of standardized WAASBY means.
In the last step, we calculated the scores of ideotypes. In the case of WAASBY, the ideotype has a value of 100, irrespective of trait. Thus, the matrix of ideotypes is, in fact, a vector of 100s. This vector substitutes \(\breve{\mathbf{Q}}\) in Equation 10 to compute the ideotype scores. Finally, the MTSI is computed as the difference between the genotypes’ and ideotypes’ scores:
\[ MTSI_v = \left[ \sum_{k=1}^K{\left(F_{vk} - \acute{F}_k\right)^2} \right]^{\frac{1}{2}} \tag{11}\] where \(F_{vk}\) is the score of the \(v^{th}\) candidate in the \(k^{th}\) factor, and \(\acute{F}_k\) is the score of the ideotype in the \(k^{th}\) factor. A desirable candidate has a low MTSI (Figure 9), meaning a closer distance to the ideotype.
Codes
mineval = .5
ideotype = c("h", 'h', 'h')
rescaled <- unlist(strsplit(ideotype, split = "\\s*(\\s|,)\\s*")) %>%
all_lower_case()
ideotype.D <- case_when(rescaled == "h" ~ 100, rescaled == "m" ~ 50, rescaled == "l" ~ 0)
names(ideotype.D) <- names(waasbymat)
means <- waasbymat
cor.means <- cor(waasbymat)
eigen.decomposition <- eigen(cor.means)
eigen.values <- eigen.decomposition$values
eigen.vectors <- eigen.decomposition$vectors
colnames(eigen.vectors) <- paste("PC", 1:ncol(cor.means), sep = "")
rownames(eigen.vectors) <- colnames(means)
eigen.values.factors <- as.vector(c(as.matrix(sqrt(eigen.values[eigen.values >=
mineval]))))
initial_loadings <- cbind(eigen.vectors[, eigen.values >=
mineval] * eigen.values.factors)
A <- initial_loadings
partial <- solve_svd(cor.means)
k <- ncol(means)
seq_k <- seq_len(ncol(means))
for (j in seq_k) {
for (i in seq_k) {
if (i == j) {
next
}
else {
partial[i, j] <- -partial[i, j] / sqrt(partial[i, i] * partial[j, j])
}
}
}
colnames(A) <- paste("FA", 1:ncol(initial_loadings), sep = "")
pca <- tibble(
PC = paste("PC", 1:ncol(means), sep = ""),
Eigenvalues = eigen.values,
`Variance (%)` = (eigen.values / sum(eigen.values)) *
100,
`Cum. variance (%)` = cumsum(`Variance (%)`)
)
Communality <- diag(A %*% t(A))
Uniquenesses <- 1 - Communality
fa <- cbind(A, Communality, Uniquenesses) %>% as_tibble(rownames = NA) %>%
rownames_to_column("VAR")
z <- scale(means, center = FALSE, scale = apply(means, 2, sd))
canonical_loadings <- t(t(A) %*% solve_svd(cor.means))
scores <- z %*% canonical_loadings
colnames(scores) <- paste("FA", 1:ncol(scores), sep = "")
pos.var.factor <- which(abs(A) == apply(abs(A), 1, max), arr.ind = TRUE)
var.factor <- lapply(1:ncol(A), function(i) {
rownames(pos.var.factor)[pos.var.factor[, 2] ==
i]
})
names(var.factor) <- paste("FA", 1:ncol(A), sep = "")
names.pos.var.factor <- rownames(pos.var.factor)
names(ideotype.D) <- colnames(means)
ideotypes.matrix <- t(as.matrix(ideotype.D)) / apply(means, 2, sd)
rownames(ideotypes.matrix) <- "ID1"
ideotypes.scores <- ideotypes.matrix %*% canonical_loadings
gen_ide <- sweep(scores, 2, ideotypes.scores, "-")
MTSI <- sort(apply(gen_ide, 1, function(x)
sqrt(sum(x^2))), decreasing = FALSE)
contr.factor <- data.frame((sqrt(gen_ide^2) / apply(gen_ide, 1, function(x)
sum(sqrt(
x^2
)))) * 100) %>% rownames_to_column("GEN") %>%
as_tibble()
df.index = data.frame(MTSI) |> rownames_to_column('geno') |>
left_join(unique(pc.df[, c("level", 'type')]), by = c("geno" = 'level'))
ggarrange(
df.index |> filter(type == "line") |>
ggplot(aes(x = geno, y = MTSI)) +
theme_bw() +
theme(
axis.text.x = element_blank(),
legend.title = element_blank(),
legend.position = 'none'
) +
geom_segment(aes(
x = geno,
xend = geno,
y = 0,
yend = MTSI
), linewidth = 1) +
geom_point(size = 3, color = 'black') +
geom_point(
data = df.index |> filter(type == "line") |>
slice_min(order_by = MTSI, n = 5),
aes(x = geno, y = MTSI, colour = 'Selected'),
size = 3
) +
labs(x = "Lines") +
geom_label(
data = df.index |> filter(type == "line") |>
slice_min(order_by = MTSI, n = 5),
aes(label = geno, y = MTSI),
size = 2.6,
angle = 90,
fill = '#4daf4a',
colour = "white",
fontface = 'bold',
hjust = -.2
) +
scale_colour_manual(values = c('#4daf4a')),
df.index |> filter(type == "Shybrid") |>
ggplot(aes(x = geno, y = MTSI)) +
theme_bw() +
theme(
axis.text.x = element_blank(),
legend.title = element_blank(),
legend.position = 'none'
) +
geom_segment(aes(
x = geno,
xend = geno,
y = 0,
yend = MTSI
), linewidth = 1) +
geom_point(size = 3, color = 'black') +
geom_point(
data = df.index |> filter(type == "Shybrid") |>
slice_min(order_by = MTSI, n = 12),
aes(x = geno, y = MTSI, colour = 'Selected'),
size = 3
) +
labs(x = "Single-cross hybrid") +
geom_label(
data = df.index |> filter(type == "Shybrid") |>
slice_min(order_by = MTSI, n = 12),
aes(label = geno, y = MTSI),
size = 1.56,
angle = 90,
fill = '#984ea3',
colour = "white",
fontface = 'bold',
hjust = -.1,
alpha = .9
) +
scale_colour_manual(values = c('#984ea3')),
df.index |> filter(type == "Thybrid") |>
ggplot(aes(x = geno, y = MTSI)) +
theme_bw() +
theme(
axis.text.x = element_blank(),
legend.title = element_blank(),
legend.position = 'none'
) +
geom_segment(aes(
x = geno,
xend = geno,
y = 0,
yend = MTSI
), linewidth = 1) +
geom_point(size = 3, color = 'black') +
geom_point(
data = df.index |> filter(type == "Thybrid") |>
slice_min(order_by = MTSI, n = 32),
aes(x = geno, y = MTSI, colour = 'Selected'),
size = 3
) +
labs(x = "Three-way cross hybrid") +
geom_label(
data = df.index |> filter(type == "Thybrid") |>
slice_min(order_by = MTSI, n = 32),
aes(label = geno, y = MTSI),
size = 1.5,
angle = 90,
fill = '#ff7f00',
colour = "white",
fontface = 'bold',
hjust = -.2,
alpha = .7
) +
scale_colour_manual(values = c('#ff7f00')),
nrow = 3,
labels = "AUTO"
)The selection of these lines represent an expected genetic gain of (in percentage):
Codes
selline = df.index[which(df.index$type == "line"),]
selline = selline[order(selline$MTSI), 'geno'][1:5]
lapply(main, function(x){
temp = x |> filter(grepl("VML",geno) & !grepl("\\/", geno)) |>
mutate(mtsiel = ifelse(geno %in% selline, 'YES', "NO"))
((mean(temp[temp$mtsiel == "YES", 'mug']) - mean(temp$mug))/mean(temp$mug)) * 100
})$PG
[1] 3.959911
$AE
[1] -2.133564
$AP
[1] 2.204829